from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import pandas as pd
from iso3166 import countries
import plotly.express as px
uri = "mongodb://localhost:27017/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi('1'))
database = client["OpenAlexEnvironmental"]
collection = database["journals"]
print(f"Total number of Articles: {collection.count_documents({})}")
Total number of Articles: 0
uri = "mongodb://localhost:27017/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi('1'))
database = client["test_db"]
collection = database["journals"]
print(f"Total number of Articles: {collection.count_documents({})}")
Total number of Articles: 1039248
collection.find_one()
{'_id': ObjectId('64d2f6d8adb21da659ac9df5'),
'title': 'Triclosan exposure, transformation, and human health effects',
'language': 'en',
'publication_year': 2017,
'publication_date': '2017-11-17',
'type': 'article',
'primary_location': {'is_oa': False,
'landing_page_url': 'https://doi.org/10.1080/10937404.2017.1399306',
'pdf_url': None,
'source': {'id': 'https://openalex.org/S96923654',
'display_name': 'Journal of Toxicology and Environmental Health-part B-critical Reviews',
'issn_l': '1093-7404',
'issn': ['1521-6950', '1093-7404'],
'is_oa': False,
'is_in_doaj': False,
'host_organization': 'https://openalex.org/P4310320547',
'host_organization_name': 'Taylor & Francis',
'host_organization_lineage': ['https://openalex.org/P4310320547'],
'host_organization_lineage_names': ['Taylor & Francis'],
'type': 'journal'},
'license': None,
'version': None},
'authorships': [{'author_position': 'first',
'author': {'id': 'https://openalex.org/A5003146474',
'display_name': 'Lisa M. Weatherly',
'orcid': None},
'institutions': [{'id': 'https://openalex.org/I7947594',
'display_name': 'University of Maine',
'ror': 'https://ror.org/01adr0w49',
'country_code': 'US',
'type': 'education'}],
'countries': ['US'],
'is_corresponding': False,
'raw_affiliation_string': 'a Graduate School of Biomedical Science and Engineering , University of Maine , Orono , ME , USA.',
'raw_affiliation_strings': ['a Graduate School of Biomedical Science and Engineering , University of Maine , Orono , ME , USA.']},
{'author_position': 'last',
'author': {'id': 'https://openalex.org/A5012277599',
'display_name': 'Julie A. Gosse',
'orcid': 'https://orcid.org/0000-0002-2457-4039'},
'institutions': [{'id': 'https://openalex.org/I7947594',
'display_name': 'University of Maine',
'ror': 'https://ror.org/01adr0w49',
'country_code': 'US',
'type': 'education'}],
'countries': ['US'],
'is_corresponding': False,
'raw_affiliation_string': 'b Department of Molecular and Biomedical Sciences , University of Maine , Orono , ME , USA.',
'raw_affiliation_strings': ['b Department of Molecular and Biomedical Sciences , University of Maine , Orono , ME , USA.']}],
'biblio': {'volume': '20',
'issue': '8',
'first_page': '447',
'last_page': '469'},
'concepts': [{'id': 'https://openalex.org/C2781289450',
'wikidata': 'https://www.wikidata.org/wiki/Q408646',
'display_name': 'Triclosan',
'level': 2,
'score': 0.9709624},
{'id': 'https://openalex.org/C3018890749',
'wikidata': 'https://www.wikidata.org/wiki/Q204711',
'display_name': 'Food and drug administration',
'level': 2,
'score': 0.68914425},
{'id': 'https://openalex.org/C143432726',
'wikidata': 'https://www.wikidata.org/wiki/Q520181',
'display_name': 'Hand sanitizer',
'level': 2,
'score': 0.63388824},
{'id': 'https://openalex.org/C2776866151',
'wikidata': 'https://www.wikidata.org/wiki/Q35855',
'display_name': 'Toothpaste',
'level': 2,
'score': 0.5975416},
{'id': 'https://openalex.org/C2987857752',
'wikidata': 'https://www.wikidata.org/wiki/Q12147',
'display_name': 'Human health',
'level': 2,
'score': 0.5535524},
{'id': 'https://openalex.org/C71924100',
'wikidata': 'https://www.wikidata.org/wiki/Q11190',
'display_name': 'Medicine',
'level': 0,
'score': 0.47784802},
{'id': 'https://openalex.org/C2908647359',
'wikidata': 'https://www.wikidata.org/wiki/Q2625603',
'display_name': 'Population',
'level': 2,
'score': 0.4776762},
{'id': 'https://openalex.org/C2780035454',
'wikidata': 'https://www.wikidata.org/wiki/Q8386',
'display_name': 'Drug',
'level': 2,
'score': 0.46354717},
{'id': 'https://openalex.org/C98274493',
'wikidata': 'https://www.wikidata.org/wiki/Q128406',
'display_name': 'Pharmacology',
'level': 1,
'score': 0.3782525},
{'id': 'https://openalex.org/C99454951',
'wikidata': 'https://www.wikidata.org/wiki/Q932068',
'display_name': 'Environmental health',
'level': 1,
'score': 0.3534659},
{'id': 'https://openalex.org/C199343813',
'wikidata': 'https://www.wikidata.org/wiki/Q12128',
'display_name': 'Dentistry',
'level': 1,
'score': 0.16849786},
{'id': 'https://openalex.org/C142724271',
'wikidata': 'https://www.wikidata.org/wiki/Q7208',
'display_name': 'Pathology',
'level': 1,
'score': 0.12375158}],
'abstract_inverted_index': {'Triclosan': [0],
'(TCS)': [1],
'is': [2, 14, 70],
'an': [3],
'antimicrobial': [4],
'used': [5],
'so': [6],
'ubiquitously': [7],
'that': [8],
'75%': [9],
'of': [10, 89],
'the': [11, 37, 41],
'US': [12, 42],
'population': [13],
'likely': [15],
'exposed': [16],
'to': [17, 93],
'this': [18, 90],
'compound': [19],
'via': [20],
'consumer': [21],
'goods': [22],
'and': [23, 44, 66, 76, 79, 85, 98, 104],
'personal': [24, 57],
'care': [25, 58],
'products.': [26],
'In': [27],
'September': [28],
'2016,': [29],
'TCS': [30, 49, 69, 95, 116],
'was': [31, 92, 122],
'banned': [32],
'from': [33],
'soap': [34],
'products': [35, 59],
'following': [36],
'risk': [38],
'assessment': [39],
'by': [40],
'Food': [43],
'Drug': [45],
'Administration': [46],
'(FDA).': [47],
'However,': [48],
'still': [50],
'remains,': [51],
'at': [52],
'high': [53],
'concentrations,': [54],
'in': [55, 81],
'other': [56],
'such': [60, 118],
'as': [61, 100, 102, 119],
'toothpaste,': [62],
'mouthwash,': [63],
'hand': [64],
'sanitizer,': [65],
'surgical': [67],
'soaps.': [68],
'readily': [71],
'absorbed': [72],
'into': [73],
'human': [74, 83, 111],
'skin': [75],
'oral': [77],
'mucosa': [78],
'found': [80],
'various': [82],
'tissues': [84],
'fluids.': [86],
'The': [87, 107],
'aim': [88],
'review': [91],
'describe': [94],
'exposure': [96],
'routes': [97],
'levels': [99],
'well': [101],
'metabolism': [103],
'transformation': [105],
'processes.': [106],
'burgeoning': [108],
'literature': [109],
'on': [110],
'health': [112],
'effects': [113],
'associated': [114],
'with': [115],
'exposure,': [117],
'reproductive': [120],
'problems,': [121],
'also': [123],
'summarized.': [124]},
'cited_by_count': 308,
'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W2768246307'}
publications_data_list=[]
c=0
for x in collection.find():
if(len(x['authorships'])>0):
for aut in x['authorships']:
if(aut['author_position']=='first' and len(aut['institutions'])>0 and ('country_code' in aut['institutions'][0].keys())):
first_author_ctrycode=aut['institutions'][0]['country_code']
if(len(x['authorships'])>0):
for aut in x['authorships']:
if(aut['author_position']=='last' and len(aut['institutions'])>0 and ('country_code' in aut['institutions'][0].keys())):
last_author_ctrycode=aut['institutions'][0]['country_code']
if(c%10000==0):
print(c, end="--")
c=c+1
publications_data_list.append((x['_id'],x['publication_year'],x['type'],first_author_ctrycode, last_author_ctrycode,x['cited_by_count']))
0--10000--20000--30000--40000--50000--60000--70000--80000--90000--100000--110000--120000--130000--140000--150000--160000--170000--180000--190000--200000--210000--220000--230000--240000--250000--260000--270000--280000--290000--300000--310000--320000--330000--340000--350000--360000--370000--380000--390000--400000--410000--420000--430000--440000--450000--460000--470000--480000--490000--500000--510000--520000--530000--540000--550000--560000--570000--580000--590000--600000--610000--620000--630000--640000--650000--660000--670000--680000--690000--700000--710000--720000--730000--740000--750000--760000--770000--780000--790000--800000--810000--820000--830000--840000--850000--860000--870000--880000--890000--900000--910000--920000--930000--940000--950000--960000--970000--980000--990000--1000000--1010000--1020000--1030000--
publications_data_list[0]
(ObjectId('64d2f6d8adb21da659ac9df5'), 2017, 'article', 'US', 'US', 308)
publications_df = pd.DataFrame(publications_data_list, columns =['id', 'year', 'type','first_aut_cc','last_aut_cc','total_citations'])
publications_df.head()
| id | year | type | first_aut_cc | last_aut_cc | total_citations | |
|---|---|---|---|---|---|---|
| 0 | 64d2f6d8adb21da659ac9df5 | 2017 | article | US | US | 308 |
| 1 | 64d2f6d8adb21da659ac9df6 | 2018 | article | DK | DK | 191 |
| 2 | 64d2f6d8adb21da659ac9df7 | 2013 | article | DK | DK | 174 |
| 3 | 64d2f6d8adb21da659ac9df8 | 2014 | article | GB | GB | 148 |
| 4 | 64d2f6d8adb21da659ac9df9 | 2013 | article | US | US | 139 |
firstauthor_pub_cnt=publications_df.loc[:, ['first_aut_cc','id']].groupby('first_aut_cc').count()
lastauthor_pub_cnt=publications_df.loc[:, ['last_aut_cc','id']].groupby('last_aut_cc').count()
firstauthor_pub_citations=publications_df.loc[:, ['first_aut_cc','total_citations']].groupby('first_aut_cc').sum()
lastauthor_pub_citations=publications_df.loc[:, ['last_aut_cc','total_citations']].groupby('last_aut_cc').sum()
firstauthor_pub_cnt.shape
(198, 1)
firstauthor_pub_cnt.head()
| id | |
|---|---|
| first_aut_cc | |
| AD | 2 |
| AE | 1078 |
| AF | 28 |
| AL | 91 |
| AM | 120 |
lastauthor_pub_cnt.shape
(205, 1)
firstauthor_pub_cnt=firstauthor_pub_cnt.merge(firstauthor_pub_citations,on='first_aut_cc')
lastauthor_pub_cnt=lastauthor_pub_cnt.merge(lastauthor_pub_citations,on='last_aut_cc')
first_aut_ctry = []
last_aut_ctry=[]
first_aut_cc3 = []
last_aut_cc3=[]
c=0
for idx in firstauthor_pub_cnt.index:
if(idx is not None):
first_aut_ctry.append(countries.get(idx).name)
#print(countries.get(idx).name)
first_aut_cc3.append(countries.get(idx).alpha3)
else:
first_aut_ctry.append(None)
first_aut_cc3.append(None)
for idx in lastauthor_pub_cnt.index:
if(idx is not None):
last_aut_ctry.append(countries.get(idx).name)
last_aut_cc3.append(countries.get(idx).alpha3)
else:
last_aut_ctry.append(None)
last_aut_cc3.append(None)
firstauthor_pub_cnt['Country']=first_aut_ctry
firstauthor_pub_cnt['Country Code']=first_aut_cc3
lastauthor_pub_cnt['Country']=last_aut_ctry
lastauthor_pub_cnt['Country Code']=last_aut_cc3
lastauthor_pub_cnt=lastauthor_pub_cnt.rename(columns={'id':'Articles Count'})
lastauthor_pub_cnt.head()
| Articles Count | total_citations | Country | Country Code | |
|---|---|---|---|---|
| last_aut_cc | ||||
| AD | 7 | 75 | Andorra | AND |
| AE | 1132 | 20848 | United Arab Emirates | ARE |
| AF | 27 | 233 | Afghanistan | AFG |
| AL | 61 | 435 | Albania | ALB |
| AM | 107 | 868 | Armenia | ARM |
firstauthor_pub_cnt=firstauthor_pub_cnt.rename(columns={'id':'Articles Count'})
firstauthor_pub_cnt.head()
| Articles Count | total_citations | Country | Country Code | |
|---|---|---|---|---|
| first_aut_cc | ||||
| AD | 2 | 89 | Andorra | AND |
| AE | 1078 | 20707 | United Arab Emirates | ARE |
| AF | 28 | 167 | Afghanistan | AFG |
| AL | 91 | 1268 | Albania | ALB |
| AM | 120 | 830 | Armenia | ARM |
income_level_data=pd.read_excel('world bank income division.xlsx')
income_level_data2=pd.read_excel('world bank income2.xlsx')
gdp_per_capita=pd.read_excel('world bank GDP data.xls')
gdp_data=pd.read_excel('Total GDP.xls')
pop_data=pd.read_excel('Pop_data.xls')
income_level_data=income_level_data.drop(columns={'Income Group Code','Income Group'})
income_level_data=income_level_data.drop_duplicates()
income_level_data.head()
| Country Code | Country | |
|---|---|---|
| 0 | ASM | American Samoa |
| 1 | AND | Andorra |
| 2 | ATG | Antigua and Barbuda |
| 3 | ABW | Aruba |
| 4 | AUS | Australia |
income_level_data2.head()
| Economy | Income group | |
|---|---|---|
| 0 | Aruba | High income |
| 1 | Afghanistan | Low income |
| 2 | Angola | Lower middle income |
| 3 | Albania | Upper middle income |
| 4 | Andorra | High income |
income_level_data2.shape
(218, 2)
income_level_data.shape
(217, 2)
income_level_data=income_level_data.merge(income_level_data2, right_on='Economy', left_on='Country')
income_level_data=income_level_data.drop(columns={'Economy','Country'})
income_level_data.head()
| Country Code | Income group | |
|---|---|---|
| 0 | ASM | High income |
| 1 | AND | High income |
| 2 | ATG | High income |
| 3 | ABW | High income |
| 4 | AUS | High income |
gdp_per_capita=gdp_per_capita.drop(columns={'Indicator Name', 'Country Name'})
gdp_per_capita=gdp_per_capita.rename(columns={'2022':'GDP per capita(US$)'})
gdp_per_capita=gdp_per_capita.drop_duplicates()
gdp_per_capita.head()
| Country Code | GDP per capita(US$) | |
|---|---|---|
| 0 | ABW | 29342.100730 |
| 1 | AFE | 1622.391720 |
| 2 | AFG | 363.674087 |
| 3 | AFW | 1790.348800 |
| 4 | AGO | 2998.501158 |
gdp_data=gdp_data.drop(columns={'Indicator Name', 'Country Name'})
gdp_data=gdp_data.rename(columns={'2022': 'GDP Total(US$)'})
gdp_data=gdp_data.drop_duplicates()
gdp_data.head()
| Country Code | GDP Total(US$) | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | |
|---|---|---|---|---|---|
| 0 | ABW | 3.126019e+09 | NaN | NaN | NaN |
| 1 | AFE | 1.169484e+12 | NaN | NaN | NaN |
| 2 | AFG | 1.458314e+10 | NaN | NaN | NaN |
| 3 | AFW | 8.778633e+11 | NaN | NaN | NaN |
| 4 | AGO | 1.067136e+11 | NaN | NaN | NaN |
pop_data=pop_data.drop(columns={'Indicator Name', 'Country Name'})
pop_data=pop_data.rename(columns={'2022': 'Population'})
pop_data=pop_data.drop_duplicates()
#pop_data.Population=pop_data.Population.astype(int)
pop_data.head()
| Country Code | Population | |
|---|---|---|
| 0 | ABW | 106445.0 |
| 1 | AFE | 720839314.0 |
| 2 | AFG | 41128771.0 |
| 3 | AFW | 490330870.0 |
| 4 | AGO | 35588987.0 |
first_author_data=firstauthor_pub_cnt.merge(income_level_data, on='Country Code',how='left')
first_author_data=first_author_data.merge(gdp_data, on='Country Code', how='left')
first_author_data=first_author_data.merge(gdp_per_capita, on='Country Code', how='left')
first_author_data=first_author_data.merge(pop_data,on='Country Code', how='left')
first_author_data.head()
| Articles Count | total_citations | Country | Country Code | Income group | GDP Total(US$) | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | GDP per capita(US$) | Population | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 89 | Andorra | AND | High income | 3.352033e+09 | NaN | NaN | NaN | 41992.793358 | 79824.0 |
| 1 | 1078 | 20707 | United Arab Emirates | ARE | High income | 5.075349e+11 | NaN | NaN | NaN | 53757.863251 | 9441129.0 |
| 2 | 28 | 167 | Afghanistan | AFG | Low income | 1.458314e+10 | NaN | NaN | NaN | 363.674087 | 41128771.0 |
| 3 | 91 | 1268 | Albania | ALB | Upper middle income | 1.888210e+10 | NaN | NaN | NaN | 6802.804519 | 2775634.0 |
| 4 | 120 | 830 | Armenia | ARM | Upper middle income | 1.950278e+10 | NaN | NaN | NaN | 7014.206592 | 2780469.0 |
last_author_data=lastauthor_pub_cnt.merge(income_level_data, on='Country Code',how='left')
last_author_data=last_author_data.merge(gdp_data, on='Country Code', how='left')
last_author_data=last_author_data.merge(gdp_per_capita, on='Country Code', how='left')
last_author_data=last_author_data.merge(pop_data,on='Country Code', how='left')
last_author_data.head()
| Articles Count | total_citations | Country | Country Code | Income group | GDP Total(US$) | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | GDP per capita(US$) | Population | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7 | 75 | Andorra | AND | High income | 3.352033e+09 | NaN | NaN | NaN | 41992.793358 | 79824.0 |
| 1 | 1132 | 20848 | United Arab Emirates | ARE | High income | 5.075349e+11 | NaN | NaN | NaN | 53757.863251 | 9441129.0 |
| 2 | 27 | 233 | Afghanistan | AFG | Low income | 1.458314e+10 | NaN | NaN | NaN | 363.674087 | 41128771.0 |
| 3 | 61 | 435 | Albania | ALB | Upper middle income | 1.888210e+10 | NaN | NaN | NaN | 6802.804519 | 2775634.0 |
| 4 | 107 | 868 | Armenia | ARM | Upper middle income | 1.950278e+10 | NaN | NaN | NaN | 7014.206592 | 2780469.0 |
first_Income_group_3levels=[]
for income in first_author_data['Income group']:
if(income=='High income'):
first_Income_group_3levels.append('High income')
elif(income=='Upper middle income' or income=='Lower middle income'):
first_Income_group_3levels.append('Middle income')
elif(income=='Low income'):
first_Income_group_3levels.append('Low income')
else:
first_Income_group_3levels.append('None')
last_Income_group_3levels=[]
for income in last_author_data['Income group']:
if(income=='High income'):
last_Income_group_3levels.append('High income')
elif(income=='Upper middle income' or income=='Lower middle income'):
last_Income_group_3levels.append('Middle income')
elif(income=='Low income'):
last_Income_group_3levels.append('Low income')
else:
last_Income_group_3levels.append('None')
first_author_data['Income_group_3levels']=first_Income_group_3levels
last_author_data['Income_group_3levels']=last_Income_group_3levels
first_author_data=first_author_data.sort_values(by=['Articles Count'], ascending=False)
last_author_data=last_author_data.sort_values(by=['Articles Count'], ascending=False)
first_author_data.head()
| Articles Count | total_citations | Country | Country Code | Income group | GDP Total(US$) | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | GDP per capita(US$) | Population | Income_group_3levels | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 36 | 280766 | 5206537 | China | CHN | Upper middle income | 1.796317e+13 | NaN | NaN | NaN | 12720.215640 | 1.412175e+09 | Middle income |
| 185 | 128822 | 2904817 | United States of America | USA | High income | 2.546270e+13 | NaN | NaN | NaN | 76398.591742 | 3.332876e+08 | High income |
| 80 | 40010 | 704320 | India | IND | Lower middle income | 3.385090e+12 | NaN | NaN | NaN | 2388.621198 | 1.417173e+09 | Middle income |
| 61 | 31336 | 856868 | United Kingdom of Great Britain and Northern I... | GBR | High income | 3.070668e+12 | NaN | NaN | NaN | 45850.426122 | 6.697141e+07 | High income |
| 53 | 31162 | 634026 | Spain | ESP | High income | 1.397509e+12 | NaN | NaN | NaN | 29350.168521 | 4.761503e+07 | High income |
last_author_data.head()
| Articles Count | total_citations | Country | Country Code | Income group | GDP Total(US$) | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | GDP per capita(US$) | Population | Income_group_3levels | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 36 | 265616 | 4830412 | China | CHN | Upper middle income | 1.796317e+13 | NaN | NaN | NaN | 12720.215640 | 1.412175e+09 | Middle income |
| 191 | 134193 | 3154211 | United States of America | USA | High income | 2.546270e+13 | NaN | NaN | NaN | 76398.591742 | 3.332876e+08 | High income |
| 83 | 39164 | 671137 | India | IND | Lower middle income | 3.385090e+12 | NaN | NaN | NaN | 2388.621198 | 1.417173e+09 | Middle income |
| 62 | 33098 | 906292 | United Kingdom of Great Britain and Northern I... | GBR | High income | 3.070668e+12 | NaN | NaN | NaN | 45850.426122 | 6.697141e+07 | High income |
| 53 | 33064 | 641088 | Spain | ESP | High income | 1.397509e+12 | NaN | NaN | NaN | 29350.168521 | 4.761503e+07 | High income |
print(first_author_data.shape)
print(last_author_data.shape)
first_author_data.to_excel("First_author_ctry_corresponding_data.xlsx")
last_author_data.to_excel("Last_author_ctry_corresponding_data.xlsx")
(198, 12) (205, 12)
first_author_data=pd.read_excel('First_author_ctry_corresponding_data.xlsx')
last_author_data=pd.read_excel('Last_author_ctry_corresponding_data.xlsx')
print("First author country Null Income levels", first_author_data['Income group'].isna().sum())
print("First author country Null GDP per capita", first_author_data['GDP per capita(US$)'].isna().sum())
print("First author country Null GDP Total",first_author_data['GDP Total(US$)'].isna().sum())
print("First author country Null Population",first_author_data['Population'].isna().sum())
print("Last author country Null Income levels", last_author_data['Income group'].isna().sum())
print("Last author country Null GDP per capita", last_author_data['GDP per capita(US$)'].isna().sum())
print("Last author country Null GDP Total", last_author_data['GDP Total(US$)'].isna().sum())
print("Last author country Null Population", last_author_data['Population'].isna().sum())
First author country Null Income levels 8 First author country Null GDP per capita 11 First author country Null GDP Total 10 First author country Null Population 8 Last author country Null Income levels 9 Last author country Null GDP per capita 13 Last author country Null GDP Total 12 Last author country Null Population 9
first_author_ctry_data=first_author_data.dropna(axis=0, subset=['Income group', 'GDP per capita(US$)','GDP Total(US$)','Population'])
last_author_ctry_data=last_author_data.dropna(axis=0, subset=['Income group','GDP per capita(US$)','GDP Total(US$)','Population'])
print(first_author_ctry_data.shape)
print(last_author_ctry_data.shape)
(186, 13) (191, 13)
first_author_ctry_data=first_author_ctry_data.loc[:,['Articles Count','total_citations','Country','Country Code','Income group','GDP Total(US$)','GDP per capita(US$)','Population','Income_group_3levels']]
last_author_ctry_data=last_author_ctry_data.loc[:,['Articles Count','total_citations','Country','Country Code','Income group','GDP Total(US$)','GDP per capita(US$)','Population','Income_group_3levels']]
first_author_ctry_data['citations_multiple']=first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations']
last_author_ctry_data['citations_multiple']=last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations']
first_author_ctry_data['normalized_citations']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['Population'])
last_author_ctry_data['normalized_citations']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/(last_author_ctry_data['Population'])
first_author_ctry_data['normalized_citations1']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['GDP Total(US$)'])
last_author_ctry_data['normalized_citations1']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/(last_author_ctry_data['GDP Total(US$)'])
first_author_ctry_data['normalized_citations2']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['GDP per capita(US$)'])
last_author_ctry_data['normalized_citations2']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/(last_author_ctry_data['GDP per capita(US$)'])
first_author_ctry_data['normalised_cit']=(first_author_ctry_data['total_citations']/first_author_ctry_data['Articles Count'])
last_author_ctry_data['normalised_cit']=(last_author_ctry_data['total_citations']/last_author_ctry_data['Articles Count'])
first_author_ctry_data['normalised_cit2']=((first_author_ctry_data['total_citations']*first_author_ctry_data['GDP per capita(US$)'])/first_author_ctry_data['Articles Count'])
last_author_ctry_data['normalised_cit2']=((last_author_ctry_data['total_citations']*first_author_ctry_data['GDP per capita(US$)'])/last_author_ctry_data['Articles Count'])
fig = px.scatter(first_author_ctry_data, x="GDP Total(US$)", y='citations_multiple', color='Income group',hover_data=['Country'],
log_y=True,log_x=True,
labels={"y": "Log scale of (Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='First Author Publications count normalized by citations vs GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
fig = px.scatter(first_author_ctry_data, x="GDP per capita(US$)", y='normalized_citations', color='Income group',hover_data=['Country'],
log_y=True,log_x=True,
labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='First Author Publications count normalized by citations and population vs GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
fig = px.scatter(first_author_ctry_data, x="GDP per capita(US$)", y='normalised_cit2', color='Income group',hover_data=['Country'],
log_y=True,log_x=True,
labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='citations *GDP per capita/First Author Publications count normalized vs GDP per capita with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
fig = px.scatter(first_author_ctry_data, x="GDP per capita(US$)", y='normalised_cit', color='Income group',hover_data=['Country'],
log_x=True,
labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='First Author Publications count normalized by citations and population vs GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
fig = px.scatter(first_author_ctry_data, x="GDP Total(US$)", y='normalised_cit', color='Income group',hover_data=['Country'],
log_x=True,
labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='First Author Publications count normalized by citations and population vs GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
first_author_ctry_data['normalized_citations']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['Population']*first_author_ctry_data['GDP Total(US$)'])
last_author_ctry_data['normalized_citations']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/((last_author_ctry_data['Population']*last_author_ctry_data['GDP Total(US$)']))
fig = px.scatter(first_author_ctry_data, y="normalized_citations", x='GDP per capita(US$)', color='Income group',hover_data=['Country'],
log_x=True, log_y=True,
labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='First Author Publications count normalized by citations and population vs GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
fig = px.scatter(first_author_ctry_data, y="normalized_citations", x='normalized_citations', color='Income group',hover_data=['Country'],
log_x=True, log_y=True,
labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
title='First Author Publications count normalized by citations and population vs GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
##(articles count*citations)/population
fig=px.bar(first_author_ctry_data, x='Country', y='normalized_citations', color='Income group', log_y=True, color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low income' : 'red'},
labels={"Articles Count": "Log of Articles Count"},
title='First Author Publications count multiplied by citations and divided by population with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
##(articles count*citations)/gdp
fig=px.bar(first_author_ctry_data, x='Country', y='normalized_citations1', color='Income group', log_y=True, color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low income' : 'red'},
labels={"Articles Count": "Log of Articles Count"},
title='First Author Publications count multiplied by citations and divided by GDP with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
#(articles count*citations)/gdp per capita
fig=px.bar(first_author_ctry_data, x='Country', y='normalized_citations2', color='Income group', log_y=True, color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low income' : 'red'},
labels={"Articles Count": "Log of Articles Count"},
title='First Author Publications count multiplied by citations and divided by GDP per capita with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
#(articles citations*gdp per capita)/ articles count
fig=px.bar(first_author_ctry_data, x='Country', y='normalised_cit2', color='Income group', log_y=True, color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low income' : 'red'},
labels={"Articles Count": "Log of Articles Count"},
title='First Author Publications count multiplied by citations and divided by GDP per capita with Corresponding Income level',
category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
'High income': 'blue',
'Upper middle income' : 'green',
'Lower middle income': 'yellow',
'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()